pdata_sharing <- df %>% 
  filter(!fund.on.data.history.) %>% 
  select(.run.number., .step., share.data.,
         mean.grants.groups:sum..total.primary.publications..of.groups) %>% 
  pivot_longer(-c(.run.number., .step., share.data.)) %>% 
  drop_na()

pdata_sharing %>% 
  filter(str_detect(name, "gini")) %>% 
  ggplot(aes(.step., value, colour = share.data.)) +
  geom_smooth() +
  facet_wrap(vars(name), nrow = 2)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

inequality is lower when sharing data

p <- pdata_sharing %>% 
  filter(str_detect(name, "gini")) %>% 
  ggplot(aes(.step., value, colour = share.data., group = .run.number.)) +
  geom_line() +
  facet_wrap(vars(name), nrow = 2)
plotly::ggplotly(p)
# data sharing with funding reward
data_funding <- df %>% 
  filter(share.data.) %>% 
  select(.run.number., .step., fund.on.data.history.,
         mean.grants.groups:sum..total.primary.publications..of.groups) %>% 
  pivot_longer(-c(.run.number., .step., fund.on.data.history.)) %>% 
  drop_na()

data_funding %>% 
  filter(str_detect(name, "gini")) %>% 
  ggplot(aes(.step., value, colour = fund.on.data.history.)) +
  geom_smooth() +
  facet_wrap(vars(name), nrow = 2)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

# inequality is higher when sharing data
p <- data_funding %>% 
  filter(str_detect(name, "gini")) %>% 
  ggplot(aes(.step., value, colour = fund.on.data.history., group = .run.number.)) +
  geom_line() +
  facet_wrap(vars(name), nrow = 2)
plotly::ggplotly(p)
# compare three
comparison <- df %>% 
  select(.run.number., .step., fund.on.data.history., share.data.,
         mean.grants.groups:sum..total.datasets..of.groups) %>% 
  pivot_longer(-c(.run.number., .step., fund.on.data.history., share.data.)) %>% 
  drop_na()

comparison %>% 
  mutate(experiment = case_when(
    !share.data. & !fund.on.data.history. ~ "no sharing",
    share.data. & !fund.on.data.history. ~ "only sharing",
    share.data. & fund.on.data.history. ~ "share and reward",
    TRUE ~ NA_character_
  )) %>% 
  select(-share.data., -fund.on.data.history.) %>% 
  drop_na() -> comparison

comparison %>% 
  filter(str_detect(name, "gini")) %>% 
  ggplot(aes(.step., value, colour = experiment)) +
  geom_smooth() +
  facet_wrap(vars(name), nrow = 2)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

almost no difference: explanation: publications and datasets are closely linked. (runs with more publications also have more datasets). those that are successful are also those that share (because one funder demands it). also chance/error still large influence. could play with number of funders, share of funders that mandates data

Next thing to look at: play with rate of data vs pub history, and display the fraction of data and normal grants.

Q on experiments

does funding based on history lead to higher concentration? i.e., groups that got grants with data sharing tend to get more grants of the same sort?